This R Markdown script analyses data from the PAL (probabilistic associative learning) task of the EMBA project using a drift diffusion model implemented in ggdmc.
# number of simulations
nsim = 250
# set number of iterations and warmup for models
iter = 3000
warm = 1000
# set the seed
set.seed(2468)
The following packages are used in this RMarkdown file:
## [1] "R version 4.5.1 (2025-06-13)"
## [1] "knitr version 1.50"
## [1] "brms version 2.22.0"
## [1] "ggdmc version 0.2.6.2"
## [1] "tidyverse version 2.0.0"
## [1] "sjPlot version 2.9.0"
## [1] "ggpubr version 0.6.1"
## [1] "ggrain version 0.0.4"
## [1] "rstatix version 0.7.2"
## [1] "easystats version 0.7.5"
## [1] "BayesFactor version 0.9.12.4.7"
## [1] "bayesplot version 1.13.0"
## [1] "bayestestR version 0.17.0"
# build the model
model = BuildModel(
p.map = list(a = "1", v = "P", z = "1", d = "1", sz = "1", sv = "1",
t0 = "1", st0 = "1"),
match.map = list(M = list(s1 = "r1", s2 = "r2")),
factors = list(S = c("s1", "s2"), P = c("pre", "vol", "post")),
constants = c(st0 = 0, d = 0, sz = 0, sv = 0),
responses = c("r1", "r2"),
type = "rd")
##
## Parameter vector names are: ( see attr(,"p.vector") )
## [1] "a" "v.pre" "v.vol" "v.post" "z" "t0"
##
## Constants are (see attr(,"constants") ):
## st0 d sz sv
## 0 0 0 0
##
## Model type = rd
npar = length(GetPNames(model))
nop = 3 # number of phases
# determine the priors
pop.mean = c(a = 2.0, v.pre = 2.5, v.vol = 2.0, v.post = 2.0, z = 0.5, t0 = 0.3)
pop.scale = c(a = 0.5, v.pre = 0.5, v.vol = 0.5, v.post = 0.5, z = 0.1, t0 = 0.05)
pop.lower = c(0, rep(-5, nop), rep(0, npar-1-nop))
pop.upper = c(5, rep( 7, nop), rep(1, npar-1-nop))
p.prior = BuildPrior(
dists = rep("tnorm", npar),
p1 = pop.mean,
p2 = pop.scale*5,
lower = pop.lower,
upper = pop.upper)
mu.prior = p.prior
plot(p.prior)
sigma.prior = BuildPrior(
dists = rep("beta", npar),
p1 = rep(1, npar),
p2 = rep(1, npar),
upper = rep(2, npar))
names(sigma.prior) = GetPNames(model)
# collect the priors for the hierarchical model
priors = list(pprior = p.prior, location = mu.prior, scale = sigma.prior)
if (!file.exists(file.path(ddm_dir, "fit_rec.RData"))) {
# setup for the simulation
not = 336 # trials per subject
nos = 22 # subjects per group
npt = 72 # min trials per phase
# set some narrower priors for the data creation
pop.prior = BuildPrior(
dists = rep("tnorm", npar),
p1 = pop.mean,
p2 = pop.scale,
lower = pop.lower,
upper = pop.upper)
# simulate the data
sim.dat = simulate(model, prior = pop.prior, nsim = npt, nsub = nos)
dmi.rec = BuildDMI(sim.dat, model)
fit.rec0 = StartNewsamples(data = dmi.rec, prior = priors)
fit.rec = run(fit.rec0, thin = 16, nmc = 1000)
save(fit.rec, sim.dat, dmi.rec, file = file.path(ddm_dir, "fit_rec.RData"))
} else {
load(file.path(ddm_dir, "fit_rec.RData"))
}
# create some plots to check the model
p1 = plot(fit.rec, hyper = TRUE) + theme_bw() + theme(legend.position = "none")
p2 = plot(fit.rec, hyper = TRUE, pll = FALSE) + theme_bw() + theme(legend.position = "none")
p3 = plot(fit.rec) + theme_bw() + theme(legend.position = "none")
p4 = plot(fit.rec, hyper = TRUE, pll = FALSE, den = TRUE) + theme_bw() + theme(legend.position = "none")
# now plot them all together
ggarrange(p1, p2, p3, p4, ncol = 1, heights = c(0.25, 0.75, 1.2, 0.75))
# check the rhats
rhats = hgelman(fit.rec, verbose = TRUE)
## hyper 12 19 7 14 2 22 15 10 8 1 6 21
## 1.07 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## 11 4 3 9 18 20 5 17 13 16
## 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
# compare estimates and real values
ps = attr(sim.dat, "parameters")
hest1 = summary(fit.rec, hyper = T, type = 1, recovery = T, ps = pop.mean)
hest2 = summary(fit.rec, hyper = T, type = 2, recovery = T, ps = pop.scale)
round(hest1, 2)
## a t0 v.post v.pre v.vol z
## True 2.00 0.30 2.00 2.50 2.00 0.50
## 2.5% Estimate 1.71 0.29 1.90 2.22 1.74 0.43
## 50% Estimate 1.91 0.31 2.09 2.45 1.97 0.48
## 97.5% Estimate 2.12 0.33 2.27 2.67 2.19 0.53
## Median-True -0.09 0.01 0.09 -0.05 -0.03 -0.02
round(hest2, 2)
## a t0 v.post v.pre v.vol z
## True 0.50 0.05 0.50 0.50 0.50 0.10
## 2.5% Estimate 0.34 0.04 0.29 0.36 0.36 0.08
## 50% Estimate 0.46 0.05 0.40 0.50 0.49 0.11
## 97.5% Estimate 0.66 0.07 0.59 0.72 0.71 0.16
## Median-True -0.04 0.00 -0.10 0.00 -0.01 0.01
summary(fit.rec, recovery = T, ps = ps)
## a v.pre v.vol v.post z t0
## Mean 1.91 2.45 1.97 2.09 0.48 0.31
## True 1.89 2.39 1.97 2.07 0.47 0.31
## Diff -0.02 -0.06 0.00 -0.02 0.00 0.00
## Sd 0.43 0.45 0.45 0.36 0.10 0.05
## True 0.43 0.51 0.45 0.42 0.10 0.05
## Diff 0.00 0.05 -0.01 0.05 0.00 0.00
## a v.pre v.vol v.post z
## Mean 1.913892482 2.44614272 1.967750646 2.08692429 0.476463946
## True 1.894501506 2.39001639 1.969195572 2.06762211 0.473922741
## Diff -0.019390976 -0.05612633 0.001444926 -0.01930218 -0.002541205
## Sd 0.433868645 0.45379187 0.450509727 0.36397404 0.102637469
## True 0.432998562 0.50552409 0.445240317 0.41895476 0.101584369
## Diff -0.000870083 0.05173223 -0.005269410 0.05498072 -0.001053100
## t0
## Mean 0.308410419
## True 0.310418194
## Diff 0.002007775
## Sd 0.047261481
## True 0.046147437
## Diff -0.001114044
All looks good! Thus, we can move on to fitting the actual data.
We compute a hierarchical model separately for our three groups.
# check if it all already exists
if (!file.exists(file.path(ddm_dir, "fit_hddm.RData"))) {
# load in the data
df = read_csv("../data/PAL-ADHD_data.csv", show_col_types = F) %>%
# filter out super long and super fast reactions
filter(rt > 100 & rt < 1500) %>%
# add all the trial information
merge(., read_csv("../data/PAL_scheme.csv", show_col_types = F)) %>%
mutate(
R = if_else((acc & emo == "positive") | (!acc & emo == "negative"),
"r1", "r2"),
P = case_match(phase, "prevolatile" ~ "pre", "volatile" ~ "vol", "postvolatile" ~ "post"),
S = if_else(emo == "positive", "s1", "s2"),
s = as.factor(subID),
RT = rt/1000
) %>% select(diagnosis, adhd.meds.bin, s, S, P, R, RT)
# initialise output list and dataframe
m = list()
df.part = data.frame()
groups = unique(df$diagnosis)
for (group in groups) {
if (!file.exists(file.path(ddm_dir, sprintf("m_%s.rds", group)))) {
# select part of the data
df.grp = df %>% filter(diagnosis == group) %>%
select(-diagnosis) %>% droplevels()
# create the data model instance
dmi = BuildDMI(df.grp, model)
# do this until rhats are okay
rhat = 5
count = 0
while (rhat >= 1.1) {
# increase counter and thinning
count = count + 1
print(sprintf("%d, %d: %s", count, thin, group))
# start sampling
fit0 = StartNewsamples(data = dmi, prior = priors)
# run more to get it to be stable
fit = run(fit0, thin = 32, nmc = 1000) # ~33min
# check the rhats
rhats = hgelman(fit, verbose = TRUE)
rhat = max(rhats)# if rhats are fine, then save the fit
}
} else {
# if it already exists, just load the fit
fit = readRDS(file.path(ddm_dir, sprintf("m_%s.rds", group)))
}
# if rhats are fine, then save the "ultimate" fit
saveRDS(fit, file.path(ddm_dir, sprintf("m_%s.rds", group)))
# get the output
summary(fit, hyper = T, hci = T) # location and scale for hypers
part = summary(fit, hyper = F) # participant parameters
df.part = rbind(df.part,
as.data.frame(part) %>% rownames_to_column(var = "s") %>% filter(s != "Mean"))
# add to list
m[[group]] = fit
}
save(m, df, df.part,
file = file.path(ddm_dir, "fit_hddm.RData"))
} else {
load(file.path(ddm_dir, "fit_hddm.RData"))
}
p = list()
for (i in 1:length(m)) {
# create some plots to check the model
p1 = plot(fit.rec, hyper = TRUE) + theme_bw() + theme(legend.position = "none")
p2 = plot(fit.rec, hyper = TRUE, pll = FALSE) + theme_bw() + theme(legend.position = "none")
p3 = plot(fit.rec) + theme_bw() + theme(legend.position = "none")
p4 = plot(fit.rec, hyper = TRUE, pll = FALSE, den = TRUE) + theme_bw() + theme(legend.position = "none")
p[[i]] = ggarrange(p1, p2, p3, p4, ncol = 1, heights = c(0.25, 0.75, 1.2, 0.75))
}
annotate_figure(p[[1]],
top = text_grob(sprintf("Model checks: %s", names(m)[1]),
face = "bold", size = 14))
annotate_figure(p[[2]],
top = text_grob(sprintf("Model checks: %s", names(m)[2]),
face = "bold", size = 14))
annotate_figure(p[[3]],
top = text_grob(sprintf("Model checks: %s", names(m)[3]),
face = "bold", size = 14))
# add diagnosis
df.part = merge(df.part, df %>% select(s, diagnosis, adhd.meds.bin) %>% distinct())
# convert to long format for drift rates
df.lng = df.part %>%
pivot_longer(cols = starts_with("v."), values_to = "v") %>%
mutate(
phase = case_match(name,
"v.pre" ~ "prevolatile",
"v.vol" ~ "volatile",
"v.post" ~ "postvolatile"),
phase = factor(phase, levels = c("prevolatile", "volatile", "postvolatile"))
) %>%
mutate_if(is.character, as.factor)
# set and print the contrasts
contrasts(df.lng$diagnosis) = contr.sum(3)
contrasts(df.lng$diagnosis)
## [,1] [,2]
## ADHD 1 0
## BOTH 0 1
## COMP -1 -1
contrasts(df.lng$phase) = contr.sum(3)
contrasts(df.lng$phase)
## [,1] [,2]
## prevolatile 1 0
## volatile 0 1
## postvolatile -1 -1
# set the formula
f.v = brms::bf(v ~ diagnosis * phase + (1|s))
# set weakly informative priors
priors = c(
prior(normal(2, 1.50), class = Intercept),
prior(normal(0, 0.50), class = sigma),
prior(normal(0, 0.50), class = sd),
prior(normal(0, 0.50), class = b)
)
As the next step, we fit the model, check whether there are divergence or rhat issues, and then check whether the chains have converged.
# fit the final model
m.v = brm(f.v, seed = 1234,
df.lng, prior = priors,
iter = iter, warmup = warm,
backend = "cmdstanr", threads = threading(2),
file = file.path(brms_dir, "m_ddm_v"),
save_pars = save_pars(all = TRUE)
)
rstan::check_hmc_diagnostics(m.v$fit)
##
## Divergences:
## 0 of 8000 iterations ended with a divergence.
##
## Tree depth:
## 0 of 8000 iterations saturated the maximum tree depth of 10.
##
## Energy:
## E-BFMI indicated no pathological behavior.
# check that rhats are below 1.01
sum(brms::rhat(m.v) >= 1.01, na.rm = T)
## [1] 0
# check the trace plots
post.draws = as_draws_df(m.v)
mcmc_trace(post.draws, regex_pars = "^b_",
facet_args = list(ncol = 3)) +
scale_x_continuous(breaks=scales::pretty_breaks(n = 3)) +
scale_y_continuous(breaks=scales::pretty_breaks(n = 3))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
This model has no pathological behaviour with E-BFMI, no divergent
sample and no rhats that are higher or equal to 1.01. Therefore, we go
ahead and perform our posterior predictive checks.
# get posterior predictions
post.pred = posterior_predict(m.v, ndraws = nsim)
# check the fit of the predicted data compared to the real data
p1 = pp_check(m.v, ndraws = nsim) +
theme_bw() + theme(legend.position = "none")
# distributions of means compared to the real values per group
p2 = ppc_stat_grouped(df.lng$v, post.pred, df.lng$diagnosis) +
theme_bw() + theme(legend.position = "none")
p3 = ppc_stat_grouped(df.lng$v, post.pred, df.lng$phase) +
theme_bw() + theme(legend.position = "none")
p = ggarrange(p1, p2, p3,
ncol = 1, labels = "AUTO")
annotate_figure(p, top = text_grob("Posterior predictive checks",
face = "bold", size = 14))
Now that we are convinced that we can trust our model, we have a look at its estimate and use the hypothesis function to assess our hypotheses and perform explorative tests.
# print a summary
summary(m.v)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: v ~ diagnosis * phase + (1 | s)
## Data: df.lng (Number of observations: 198)
## Draws: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup draws = 8000
##
## Multilevel Hyperparameters:
## ~s (Number of levels: 66)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.59 0.06 0.49 0.72 1.00 1552 3009
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.31 0.08 2.16 2.47 1.01 912 1772
## diagnosis1 -0.27 0.11 -0.48 -0.07 1.01 883 1820
## diagnosis2 -0.05 0.11 -0.26 0.16 1.01 900 1974
## phase1 -0.03 0.03 -0.09 0.04 1.00 6249 5825
## phase2 0.04 0.03 -0.02 0.11 1.00 6359 5664
## diagnosis1:phase1 0.08 0.05 -0.01 0.17 1.00 5341 5891
## diagnosis2:phase1 -0.01 0.05 -0.11 0.08 1.00 5088 5845
## diagnosis1:phase2 0.01 0.05 -0.08 0.11 1.00 5421 5318
## diagnosis2:phase2 -0.03 0.05 -0.12 0.06 1.00 4940 5486
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.33 0.02 0.29 0.38 1.00 4744 5283
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# get the estimates and compute group comparisons
df.m.v = post.draws %>%
select(starts_with("b_")) %>%
mutate(
ADHD = b_Intercept + b_diagnosis1,
BOTH = b_Intercept + b_diagnosis2,
COMP = b_Intercept - b_diagnosis1 - b_diagnosis2,
b_COMP = - b_diagnosis1 - b_diagnosis2,
b_postvolatile = - b_phase1 - b_phase2,
`e1_ADHDvCOMP` = ADHD - COMP,
`e2_BOTHvCOMP` = BOTH - COMP,
`e3_ADHDvBOTH` = ADHD - BOTH,
`e4_VOLvSTBL` = (b_Intercept + b_phase2) - ((b_Intercept + b_phase1) + (b_Intercept + b_postvolatile))/2,
`e5_diagVOLvSTBL` = -2.25*(`b_diagnosis1:phase2` + `b_diagnosis2:phase2`)
)
# plot the posterior distributions
df.m.v %>%
select(starts_with("b_")) %>%
pivot_longer(cols = starts_with("b_"), names_to = "coef", values_to = "estimate") %>%
subset(!startsWith(coef, "b_Int")) %>%
mutate(
coef = substr(coef, 3, nchar(coef)),
coef = str_replace_all(coef, ":", " x "),
coef = str_replace_all(coef, "diagnosis1", "ADHD"),
coef = str_replace_all(coef, "diagnosis2", "BOTH"),
coef = str_replace_all(coef, "phase1", "prevolatile"),
coef = str_replace_all(coef, "phase2", "volatile"),
coef = fct_reorder(coef, desc(estimate))
) %>%
group_by(coef) %>%
mutate(
cred = case_when(
(mean(estimate) < 0 & quantile(estimate, probs = 0.975) < 0) |
(mean(estimate) > 0 & quantile(estimate, probs = 0.025) > 0) ~ "credible",
T ~ "not credible"
)
) %>% ungroup() %>%
ggplot(aes(x = estimate, y = coef, fill = cred)) +
geom_vline(xintercept = 0, linetype = 'dashed') +
ggdist::stat_halfeye(alpha = 0.7) + ylab(NULL) + theme_bw() +
scale_fill_manual(values = c(c_dark, c_light)) + theme(legend.position = "none")
# get the design matrix to figure out how to set the contrasts
df.des = cbind(df.lng %>% select(diagnosis, phase),
model.matrix(~ diagnosis * phase, data = df.lng)) %>%
ungroup() %>% distinct()
# COMP > ADHD
e1 = hypothesis(m.v, "0 > 2*diagnosis1 + diagnosis2", alpha = 0.025)
e1$hypothesis
## Hypothesis Estimate Est.Error CI.Lower CI.Upper
## 1 (0)-(2*diagnosis1+diagnosis2) > 0 0.5876208 0.1809524 0.2371597 0.9484197
## Evid.Ratio Post.Prob Star
## 1 1332.333 0.99925 *
# COMP > BOTH
e2 = hypothesis(m.v, "0 > diagnosis1 + 2*diagnosis2", alpha = 0.025)
e2$hypothesis
## Hypothesis Estimate Est.Error CI.Lower CI.Upper
## 1 (0)-(diagnosis1+2*diagnosis2) > 0 0.364679 0.1824531 0.01520972 0.7326949
## Evid.Ratio Post.Prob Star
## 1 48.68944 0.979875 *
# ADHD < BOTH
e3 = hypothesis(m.v, "diagnosis1 < diagnosis2", alpha = 0.025)
e3$hypothesis
## Hypothesis Estimate Est.Error CI.Lower CI.Upper
## 1 (diagnosis1)-(diagnosis2) < 0 -0.2229418 0.1833993 -0.5794976 0.1443912
## Evid.Ratio Post.Prob Star
## 1 8.070295 0.88975
# volatile versus stable phases
e4 = hypothesis(m.v, "1.5*phase2 > 0", alpha = 0.025) # volatile - mean(prevolatile, postvolatile)
e4
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (1.5*phase2) > 0 0.06 0.05 -0.04 0.16 8.62 0.9
## Star
## 1
## ---
## 'CI': 95%-CI for one-sided and 97.5%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 97.5%;
## for two-sided hypotheses, the value tested against lies outside the 97.5%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# differences in volatility effect depending on ADHD diagnosis
e5 = hypothesis(m.v, "-2.25*(diagnosis1:phase2 + diagnosis2:phase2) > 0") # COMP(vol-stable) - mean(ADHD(vol-stable), BOTH(vol-stable))
e5
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (-2.25*(diagnosis... > 0 0.04 0.11 -0.14 0.21 1.8
## Post.Prob Star
## 1 0.64
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# equivalence
equivalence_test(df.m.v %>% select(starts_with("e")),
range = rope_range(m.v))
## # Test for Practical Equivalence
##
## ROPE: [-0.07 0.07]
##
## Parameter | H0 | inside ROPE | 95% HDI
## ----------------------------------------------------------
## e1_ADHDvCOMP | Rejected | 0.00 % | [-0.95, -0.24]
## e2_BOTHvCOMP | Undecided | 2.72 % | [-0.73, -0.02]
## e3_ADHDvBOTH | Undecided | 15.57 % | [-0.58, 0.14]
## e4_VOLvSTBL | Undecided | 57.29 % | [-0.04, 0.16]
## e5_diagVOLvSTBL | Undecided | 49.25 % | [-0.17, 0.25]
# calculate effect sizes
df.effect = post.draws %>%
mutate(across(starts_with("sd")|starts_with("sigma"), ~.^2)) %>%
mutate(
sumvar = sqrt(rowSums(select(., starts_with("sd")|starts_with("sigma")))),
e1 = -(2*`b_diagnosis1` + `b_diagnosis2`) / sumvar,
e2 = -(`b_diagnosis1` + 2*`b_diagnosis2`) / sumvar,
e3 = (-`b_diagnosis1` + `b_diagnosis2`) / sumvar,
e4 = 1.5*b_phase2 / sumvar,
e5 = -2.25*(`b_diagnosis1:phase2` + `b_diagnosis2:phase2`) / sumvar
)
kable(df.effect %>% select(starts_with("e")|starts_with("h")) %>%
pivot_longer(cols = everything(), values_to = "estimate") %>%
group_by(name) %>%
summarise(
ci.lo = lower_ci(estimate),
mean = mean(estimate),
ci.hi = upper_ci(estimate),
interpret = interpret_cohens_d(mean)
), digits = 3
)
| name | ci.lo | mean | ci.hi | interpret |
|---|---|---|---|---|
| e1 | 0.335 | 0.872 | 1.414 | large |
| e2 | 0.022 | 0.541 | 1.088 | medium |
| e3 | -0.210 | 0.331 | 0.861 | small |
| e4 | -0.054 | 0.093 | 0.245 | very small |
| e5 | -0.248 | 0.057 | 0.383 | very small |
df.lng = df.lng %>%
mutate(
v.clean = if_else(v > 6, NA, v)
)
# fit the final model
m.v2 = brm(v.clean ~ diagnosis * phase + (1 | s),
seed = 1234, prior = priors,
data = df.lng %>% drop_na(),
iter = iter, warmup = warm,
backend = "cmdstanr", threads = threading(2),
file = file.path(brms_dir, "m_ddm_v2"),
save_pars = save_pars(all = TRUE)
)
tab_model(m.v, m.v2)
| Â | v | v.clean | ||
|---|---|---|---|---|
| Predictors | Estimates | CI (95%) | Estimates | CI (95%) |
| Intercept | 2.31 | 2.16 – 2.47 | 2.31 | 2.17 – 2.44 |
| diagnosis1 | -0.27 | -0.48 – -0.07 | -0.26 | -0.46 – -0.06 |
| diagnosis2 | -0.05 | -0.26 – 0.16 | -0.03 | -0.21 – 0.16 |
| phase1 | -0.03 | -0.09 – 0.04 | -0.01 | -0.07 – 0.04 |
| phase2 | 0.04 | -0.02 – 0.11 | 0.05 | -0.00 – 0.11 |
| diagnosis1:phase1 | 0.08 | -0.01 – 0.17 | 0.07 | -0.01 – 0.15 |
| diagnosis2:phase1 | -0.01 | -0.11 – 0.08 | -0.03 | -0.10 – 0.05 |
| diagnosis1:phase2 | 0.01 | -0.08 – 0.11 | -0.00 | -0.08 – 0.08 |
| diagnosis2:phase2 | -0.03 | -0.12 – 0.06 | -0.04 | -0.12 – 0.04 |
| Random Effects | ||||
| σ2 | 0.32 | 0.29 | ||
| τ00 | 0.18 | 0.14 | ||
| ICC | 0.64 | 0.67 | ||
| N | 66 s | 66 s | ||
| Observations | 198 | 197 | ||
| Marginal R2 / Conditional R2 | 0.138 / 0.785 | 0.138 / 0.817 | ||
# COMP > ADHD
e1 = hypothesis(m.v2, "0 > 2*diagnosis1 + diagnosis2", alpha = 0.025)
e1$hypothesis
## Hypothesis Estimate Est.Error CI.Lower CI.Upper
## 1 (0)-(2*diagnosis1+diagnosis2) > 0 0.5445833 0.1816275 0.1865946 0.8933183
## Evid.Ratio Post.Prob Star
## 1 726.2727 0.998625 *
# COMP > BOTH
e2 = hypothesis(m.v2, "0 > diagnosis1 + 2*diagnosis2", alpha = 0.025)
e2$hypothesis
## Hypothesis Estimate Est.Error CI.Lower CI.Upper
## 1 (0)-(diagnosis1+2*diagnosis2) > 0 0.3091633 0.1710532 -0.03789075 0.6385828
## Evid.Ratio Post.Prob Star
## 1 23.31611 0.958875
# ADHD < BOTH
e3 = hypothesis(m.v2, "diagnosis1 < diagnosis2", alpha = 0.025)
e3$hypothesis
## Hypothesis Estimate Est.Error CI.Lower CI.Upper
## 1 (diagnosis1)-(diagnosis2) < 0 -0.2354199 0.1711057 -0.5644204 0.100727
## Evid.Ratio Post.Prob Star
## 1 11.28879 0.918625
# combine diagnosis and medication
df.lng.sel = df.lng %>%
filter(diagnosis != "COMP") %>% droplevels(except = "phase")
# set the contrasts
contrasts(df.lng.sel$phase)
## [,1] [,2]
## prevolatile 1 0
## volatile 0 1
## postvolatile -1 -1
contrasts(df.lng.sel$adhd.meds.bin) = contr.sum(2)[c(2,1)]
contrasts(df.lng.sel$adhd.meds.bin)
## [,1]
## FALSE -1
## TRUE 1
# update formula
f.v = brms::bf(v ~ adhd.meds.bin * phase + (1|s))
As the next step, we fit the model, check whether there are divergence or rhat issues, and then check whether the chains have converged.
# fit the group model
m.v3 = brm(f.v, seed = 4664,
df.lng.sel, prior = priors,
iter = iter, warmup = warm,
backend = "cmdstanr", threads = threading(2),
file = file.path(brms_dir, "m_ddm_v3"),
save_pars = save_pars(all = TRUE)
)
rstan::check_hmc_diagnostics(m.v3$fit)
##
## Divergences:
## 0 of 8000 iterations ended with a divergence.
##
## Tree depth:
## 0 of 8000 iterations saturated the maximum tree depth of 10.
##
## Energy:
## E-BFMI indicated no pathological behavior.
# check that rhats are below 1.01
sum(brms::rhat(m.v3) >= 1.01, na.rm = T)
## [1] 0
# check the trace plots
post.draws = as_draws_df(m.v3)
mcmc_trace(post.draws, regex_pars = "^b_",
facet_args = list(ncol = 3)) +
scale_x_continuous(breaks=scales::pretty_breaks(n = 3)) +
scale_y_continuous(breaks=scales::pretty_breaks(n = 3))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
This model has no pathological behaviour with E-BFMI, no divergent
sample and no rhats that are higher or equal to 1.01. Therefore, we go
ahead and perform our posterior predictive checks.
# get posterior predictions
post.pred = posterior_predict(m.v3, ndraws = nsim)
# check the fit of the predicted data compared to the real data
p1 = pp_check(m.v3, ndraws = nsim) +
theme_bw() + theme(legend.position = "none")
# distributions of means compared to the real values per group
p2 = ppc_stat_grouped(df.lng.sel$v, post.pred, df.lng.sel$adhd.meds.bin) +
theme_bw() + theme(legend.position = "none")
p3 = ppc_stat_grouped(df.lng.sel$v, post.pred, df.lng.sel$phase) +
theme_bw() + theme(legend.position = "none")
p = ggarrange(p1, p2, p3, #heights = c(1, 2, 1),
ncol = 1, labels = "AUTO")
annotate_figure(p, top = text_grob("Posterior predictive checks",
face = "bold", size = 14))
Now that we are convinced that we can trust our model, we have a look at its estimate and use the hypothesis function to assess our hypotheses and perform explorative tests.
# print a summary
summary(m.v3)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: v ~ adhd.meds.bin * phase + (1 | s)
## Data: df.lng.sel (Number of observations: 132)
## Draws: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
## total post-warmup draws = 8000
##
## Multilevel Hyperparameters:
## ~s (Number of levels: 44)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.55 0.07 0.44 0.70 1.00 1552 2718
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 2.14 0.09 1.97 2.32 1.00 1049
## adhd.meds.bin1 0.06 0.09 -0.11 0.22 1.00 1057
## phase1 0.01 0.03 -0.06 0.07 1.00 6948
## phase2 0.03 0.03 -0.03 0.10 1.00 7233
## adhd.meds.bin1:phase1 0.01 0.03 -0.06 0.08 1.00 7207
## adhd.meds.bin1:phase2 -0.00 0.03 -0.07 0.06 1.00 7282
## Tail_ESS
## Intercept 2335
## adhd.meds.bin1 1984
## phase1 6272
## phase2 6017
## adhd.meds.bin1:phase1 5843
## adhd.meds.bin1:phase2 5958
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.28 0.02 0.24 0.32 1.00 4994 5476
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# get the estimates and plot the posterior distributions
post.draws %>%
select(starts_with("b_"), -b_Intercept) %>%
pivot_longer(cols = everything(), names_to = "coef", values_to = "estimate") %>%
mutate(
coef = fct_reorder(coef, desc(estimate))
) %>%
group_by(coef) %>%
mutate(
cred = case_when(
(mean(estimate) < 0 & quantile(estimate, probs = 0.975) < 0) |
(mean(estimate) > 0 & quantile(estimate, probs = 0.025) > 0) ~ "credible",
T ~ "not credible"
)
) %>% ungroup() %>%
ggplot(aes(x = estimate, y = coef, fill = cred)) +
geom_vline(xintercept = 0, linetype = 'dashed') +
ggdist::stat_halfeye(alpha = 0.7) + ylab(NULL) + theme_bw() +
scale_fill_manual(values = c(c_dark, "not credible" = c_light)) + theme(legend.position = "none")
# rain cloud plot
p = df.lng %>%
mutate(
diagnosis = if_else(diagnosis == "BOTH", "ADHD+ASD", diagnosis)
) %>%
ggplot(aes(phase, v, fill = diagnosis, colour = diagnosis)) + #
geom_rain(rain.side = 'r',
boxplot.args = list(color = "black", outlier.shape = NA, show.legend = FALSE, alpha = .8),
violin.args = list(color = "black", outlier.shape = NA, alpha = .8),
boxplot.args.pos = list(
position = ggpp::position_dodgenudge(x = 0, width = 0.3), width = 0.3
),
point.args = list(show.legend = FALSE, alpha = .5),
violin.args.pos = list(
width = 0.6, position = position_nudge(x = 0.16)),
point.args.pos = list(position = ggpp::position_dodgenudge(x = -0.25, width = 0.1))) +
scale_fill_manual(values = col.grp) +
scale_color_manual(values = col.grp) +
labs(x = "", y = "arbitrary unit") +
theme_bw() +
theme(legend.position.inside = c(0.2, 0.8),
legend.position = "inside", plot.title = element_text(hjust = 0.5),
legend.direction = "horizontal", text = element_text(size = 15),
legend.title = element_blank())
annotate_figure(p, top = text_grob("Participant-specific drift rates",
face = "bold", size = 14))
ggsave(filename = file.path("plots", "FigDDM.svg"),
units = "cm", width = 27, height = 9)
# include medication into the plot
df.lng %>%
mutate(
diagnosis = if_else(diagnosis == "BOTH", "ADHD+ASD", diagnosis)
) %>%
ggplot(aes(diagnosis, v, fill = adhd.meds.bin, colour = adhd.meds.bin)) + #
geom_rain(rain.side = 'r',
boxplot.args = list(color = "black", outlier.shape = NA, show.legend = FALSE, alpha = .8),
violin.args = list(color = "black", outlier.shape = NA, alpha = .8),
boxplot.args.pos = list(
position = ggpp::position_dodgenudge(x = 0, width = 0.3), width = 0.3
),
point.args = list(show.legend = FALSE, alpha = .5),
violin.args.pos = list(
width = 0.6, position = position_nudge(x = 0.16)),
point.args.pos = list(position = ggpp::position_dodgenudge(x = -0.25, width = 0.1))) +
scale_fill_manual(values = col.cnd) +
scale_color_manual(values = col.cnd) +
labs(x = "", y = "arbitrary unit") +
facet_wrap(. ~ phase) +
theme_bw() +
theme(legend.position.inside = c(0.2, 0.8),
legend.position = "inside", plot.title = element_text(hjust = 0.5),
legend.direction = "horizontal", text = element_text(size = 15),
legend.title = element_blank())
# other parameters
df.part %>% select(!starts_with("v")) %>%
pivot_longer(cols = where(is.numeric)) %>%
mutate(
name = factor(case_match(name,
"a" ~ "boundary separation",
"z" ~ "starting point",
"t0" ~ "non-decision time"
))
) %>%
ggplot(aes(1, value, fill = diagnosis, colour = diagnosis)) + #
geom_rain(rain.side = 'r',
boxplot.args = list(color = "black", outlier.shape = NA, show.legend = FALSE, alpha = .8),
violin.args = list(color = "black", outlier.shape = NA, alpha = .8),
boxplot.args.pos = list(
position = ggpp::position_dodgenudge(x = 0, width = 0.3), width = 0.3
),
point.args = list(show.legend = FALSE, alpha = .5),
violin.args.pos = list(
width = 0.6, position = position_nudge(x = 0.16)),
point.args.pos = list(position = ggpp::position_dodgenudge(x = -0.25, width = 0.1))) +
scale_fill_manual(values = col.grp) +
scale_color_manual(values = col.grp) +
labs(title = "Other DDM parameters", x = "", y = "") +
facet_wrap(. ~ name, scales = "free") +
theme_bw() +
theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5),
legend.direction = "horizontal", text = element_text(size = 15),
axis.text.x = element_blank(), axis.ticks.x = element_blank())